rm(list = ls())
setwd("D:\\Google Drive\\TAMU\\Projets\\Hari\\Dissertation\\Paper 1")
library(tidyverse)
library(fBasics)
library(epiDisplay)
library(readxl)
input_data_main <- read_excel("Data/Sales Pipeline 20211011.xlsx", sheet = "report1633931123059")
dim(input_data_main)[1]
## [1] 5338
# Keep only lost or won bids
input_data <- input_data_main %>%dplyr::filter(Stage == "Closed Lost" |Stage == "Closed Won" )
dim(input_data)[1]
## [1] 2875
# Keep only bids between 2016 and 2021
data_till_2021 <-input_data %>%dplyr::filter(`Fiscal Year Sodexo` <=2021 &`Fiscal Year Sodexo` >=2016 )
dim(data_till_2021)[1]
## [1] 2476
data_till_2021 <- data_till_2021%>%drop_na(`Service Level 1`)
dim(data_till_2021)[1]
## [1] 2476
data_till_2021_new <- data_till_2021
data_till_2021_new <- data_till_2021_new%>% mutate(`Old Provider` =
case_when(`REP-Ultimate Parent Provided by Name` == "Aramark Group" ~ "Aramark",
`REP-Ultimate Parent Provided by Name` == "Compass Group" ~ "Compass",
`REP-Ultimate Parent Provided by Name` == "Sodexo Group" ~ "Sodexo"))%>%
dplyr::mutate(`Old Provider` = replace_na(`Old Provider`,"Other"))
data_till_2021_new <- data_till_2021_new%>% mutate(`New Provider` =
case_when(`REP-Service Ultimate Parent Awarded to` == "Aramark Group" ~ "Aramark",
`REP-Service Ultimate Parent Awarded to` == "Compass Group" ~ "Compass",
`REP-Service Ultimate Parent Awarded to` == "Sodexo Group" ~ "Sodexo"))%>%
dplyr::mutate(`New Provider` = replace_na(`New Provider`,"Other"))
tab1(data_till_2021_new$`Old Provider`)
## data_till_2021_new$`Old Provider` :
## Frequency Percent Cum. percent
## Aramark 69 2.8 2.8
## Compass 216 8.7 11.5
## Other 1468 59.3 70.8
## Sodexo 723 29.2 100.0
## Total 2476 100.0 100.0
tab1(data_till_2021_new$`New Provider`)
## data_till_2021_new$`New Provider` :
## Frequency Percent Cum. percent
## Aramark 69 2.8 2.8
## Compass 222 9.0 11.8
## Other 933 37.7 49.4
## Sodexo 1252 50.6 100.0
## Total 2476 100.0 100.0
library(dummies)
# New Provider - Dummy coding
new_provider <- as.data.frame(data_till_2021_new$`New Provider`)
colnames(new_provider)<- c("new_provider")
data_till_2021_new <- cbind(data_till_2021_new, dummy.data.frame(new_provider, sep = "_"))
# Creating Yeas<= 2016 Dummy
data_till_2021_new <- data_till_2021_new%>% mutate(`year_greater_than_2016` =
case_when(`Fiscal Year Sodexo` <= 2016 ~ 0,
`Fiscal Year Sodexo` > 2016 ~ 1))
data_till_2021_new$year_greater_than_2016 <- as.factor(data_till_2021_new$year_greater_than_2016)
data_till_2021_new$year_greater_than_2016 <- relevel(data_till_2021_new$year_greater_than_2016 , ref = "0")
# Factorizing Opportunity Global Region
data_till_2021_new$`Opportunity Global Region` <- as.factor(data_till_2021_new$`Opportunity Global Region`)
data_till_2021_new$`Opportunity Global Region` <- relevel(data_till_2021_new$`Opportunity Global Region`, ref = "NORAM" )
# Factorizing SL1
data_till_2021_new$`Service Level 1` <- as.factor(data_till_2021_new$`Service Level 1`)
data_till_2021_new$`Service Level 1` <- relevel(data_till_2021_new$`Service Level 1`, ref = "Hard FM" )
# Factorizing Stategic Segment 1
data_till_2021_new$`Strategic Segment L1` <- as.factor(data_till_2021_new$`Strategic Segment L1`)
data_till_2021_new$`Strategic Segment L1` <- relevel(data_till_2021_new$`Strategic Segment L1`, ref = "Mining" )
# Factorizing Regional strategic Account
data_till_2021_new$`Regional Strategic Account` <- as.factor(data_till_2021_new$`Regional Strategic Account`)
data_till_2021_new$`Regional Strategic Account` <- relevel(data_till_2021_new$`Regional Strategic Account` , ref = "0")
# Converting Revenue (Annual) (converted) into Millions of Euros
data_till_2021_new$`Revenue (Annual) (converted) in Mill Euros`<- data_till_2021_new$`Revenue (Annual) (converted)`/1e6
data_till_2021_new <- data_till_2021_new%>% drop_na(`Revenue (Annual) (converted) in Mill Euros`)
# Factorizing Old Provider
data_till_2021_new$`Old Provider` <- as.factor(data_till_2021_new$`Old Provider`)
data_till_2021_new$`Old Provider` <- relevel(data_till_2021_new$`Old Provider`, ref = "Other" )
# Factorizing New Provider
data_till_2021_new$`New Provider` <- as.factor(data_till_2021_new$`New Provider`)
data_till_2021_new$`New Provider` <- relevel(data_till_2021_new$`New Provider`, ref = "Other" )
setwd("D:\\Google Drive\\TAMU\\Projets\\Hari\\Dissertation\\Paper 1")
Startegic_Emphasis_Data <- read_excel("Data/Startegic Emphasis Data.xlsx",sheet = "Sheet1")
library("reshape2")
Startegic_Emphasis_Data <- dcast(melt(Startegic_Emphasis_Data, id.vars=c("Fiscal Year Sodexo", "Company")), `Fiscal Year Sodexo`~variable+Company)
Startegic_Emphasis_Data_Combined <- read_excel("Data/Startegic Emphasis Data.xlsx",sheet = "Combined")
Startegic_Emphasis_Data_Combined <- dcast(melt(Startegic_Emphasis_Data_Combined, id.vars=c("Fiscal Year Sodexo", "Company")), `Fiscal Year Sodexo`~variable+Company)
library(caret)
library(splitTools)
library(ranger)
set.seed(12345)
set.seed(3451)
inds <- partition(data_till_2021_new$`New Provider`, p = c(train = 0.8, test = 0.2))
#str(inds)
train_main <- data_till_2021_new[inds$train, ]
test_main <- data_till_2021_new[inds$test, ]
outcome <- "New Provider"
variables <- c("Old Provider","Opportunity Global Region","Service Level 1","Strategic Segment L1","Regional Strategic Account","Revenue (Annual) (converted) in Mill Euros","Margin (GP/BR)" )
all_variables <- c(outcome, variables)
train <- train_main %>% dplyr::select(all_of(all_variables))
test <- test_main %>% dplyr::select(all_of(all_variables))
colnames(train) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin" )
colnames(test) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin" )
library(randomForest)
modfit.rf <- randomForest(New.Provider ~. , data=train)
## GBM
library(caret)
myControl <- trainControl(method='cv', number=5, returnResamp='none')
labelName <- 'New.Provider'
predictors <- names(train)[names(train) != labelName]
modfit.gbm <- train(train[,predictors], train[,labelName], method='gbm', trControl=myControl)
f <- as.formula(paste(labelName, paste(predictors, collapse = " + "),sep = "~"))
modfit.mlogit <- multinom(f, data = train)
library(e1071)
modfit.naivebayes <- e1071::naiveBayes(New.Provider ~ ., data = train)
modfit.svm <- e1071::svm(New.Provider ~ ., data = train,probability = TRUE)
#RF
prediction_rf_train <- predict(modfit.rf, train, type="class")
confusion_matrix_rf_train <- confusionMatrix(prediction_rf_train, train$New.Provider)
confusion_matrix_rf_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 665 6 29 84
## Aramark 1 36 0 7
## Compass 3 2 118 2
## Sodexo 77 11 31 908
##
## Overall Statistics
##
## Accuracy : 0.8722
## 95% CI : (0.8567, 0.8866)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7806
##
## Mcnemar's Test P-Value : 9.862e-10
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.8914 0.65455 0.66292 0.9071
## Specificity 0.9036 0.99584 0.99612 0.8784
## Pos Pred Value 0.8482 0.81818 0.94400 0.8841
## Neg Pred Value 0.9323 0.99019 0.96765 0.9024
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3359 0.01818 0.05960 0.4586
## Detection Prevalence 0.3960 0.02222 0.06313 0.5187
## Balanced Accuracy 0.8975 0.82519 0.82952 0.8928
prediction_rf_test <- predict(modfit.rf, test, type="class")
confusion_matrix_rf_test <- confusionMatrix(prediction_rf_test, test$New.Provider)
confusion_matrix_rf_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 135 1 15 39
## Aramark 0 11 4 0
## Compass 4 0 16 3
## Sodexo 48 2 9 209
##
## Overall Statistics
##
## Accuracy : 0.748
## 95% CI : (0.7074, 0.7856)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5634
##
## Mcnemar's Test P-Value : 0.008243
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7219 0.78571 0.36364 0.8327
## Specificity 0.8220 0.99170 0.98451 0.7592
## Pos Pred Value 0.7105 0.73333 0.69565 0.7799
## Neg Pred Value 0.8301 0.99376 0.94080 0.8158
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2722 0.02218 0.03226 0.4214
## Detection Prevalence 0.3831 0.03024 0.04637 0.5403
## Balanced Accuracy 0.7720 0.88871 0.67407 0.7959
# GBM
train_preds <- as.data.frame(predict(object=modfit.gbm, train[,predictors]))
confusion_matrix_gbm_train <- confusionMatrix(train_preds$`predict(object = modfit.gbm, train[, predictors])`, train$New.Provider)
confusion_matrix_gbm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 562 5 34 128
## Aramark 3 43 2 7
## Compass 16 1 99 7
## Sodexo 165 6 43 859
##
## Overall Statistics
##
## Accuracy : 0.7894
## 95% CI : (0.7708, 0.8072)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6375
##
## Mcnemar's Test P-Value : 1.132e-06
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7534 0.78182 0.55618 0.8581
## Specificity 0.8647 0.99377 0.98668 0.7814
## Pos Pred Value 0.7709 0.78182 0.80488 0.8006
## Neg Pred Value 0.8529 0.99377 0.95746 0.8434
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.2838 0.02172 0.05000 0.4338
## Detection Prevalence 0.3682 0.02778 0.06212 0.5419
## Balanced Accuracy 0.8090 0.88779 0.77143 0.8198
test_preds <- as.data.frame(predict(object=modfit.gbm, test[,predictors]))
confusion_matrix_gbm_test <- confusionMatrix(test_preds$`predict(object = modfit.gbm, test[, predictors])`, test$New.Provider)
confusion_matrix_gbm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 127 1 10 45
## Aramark 0 10 5 0
## Compass 3 0 17 4
## Sodexo 57 3 12 202
##
## Overall Statistics
##
## Accuracy : 0.7177
## 95% CI : (0.6759, 0.757)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5101
##
## Mcnemar's Test P-Value : 0.005795
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6791 0.71429 0.38636 0.8048
## Specificity 0.8188 0.98963 0.98451 0.7061
## Pos Pred Value 0.6940 0.66667 0.70833 0.7372
## Neg Pred Value 0.8083 0.99168 0.94280 0.7793
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2560 0.02016 0.03427 0.4073
## Detection Prevalence 0.3690 0.03024 0.04839 0.5524
## Balanced Accuracy 0.7490 0.85196 0.68544 0.7555
# MLogit
prediction_mlogit_train <- predict(modfit.mlogit, train, type="class")
confusion_matrix_mlogit_train <- confusionMatrix(prediction_mlogit_train, train$New.Provider)
confusion_matrix_mlogit_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 365 5 24 203
## Aramark 10 21 4 8
## Compass 13 2 57 25
## Sodexo 358 27 93 765
##
## Overall Statistics
##
## Accuracy : 0.6101
## 95% CI : (0.5882, 0.6317)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3087
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4893 0.38182 0.32022 0.7642
## Specificity 0.8120 0.98857 0.97780 0.5117
## Pos Pred Value 0.6114 0.48837 0.58763 0.6154
## Neg Pred Value 0.7245 0.98245 0.93574 0.6798
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1843 0.01061 0.02879 0.3864
## Detection Prevalence 0.3015 0.02172 0.04899 0.6278
## Balanced Accuracy 0.6506 0.68519 0.64901 0.6380
prediction_mlogit_test <- predict(modfit.mlogit, test, type="class")
confusion_matrix_mlogit_test <- confusionMatrix(prediction_mlogit_test, test$New.Provider)
confusion_matrix_mlogit_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 88 2 4 45
## Aramark 0 8 3 1
## Compass 2 0 15 11
## Sodexo 97 4 22 194
##
## Overall Statistics
##
## Accuracy : 0.6149
## 95% CI : (0.5705, 0.658)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 6.929e-07
##
## Kappa : 0.3187
##
## Mcnemar's Test P-Value : 3.640e-05
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4706 0.57143 0.34091 0.7729
## Specificity 0.8350 0.99170 0.97124 0.4980
## Pos Pred Value 0.6331 0.66667 0.53571 0.6120
## Neg Pred Value 0.7227 0.98760 0.93803 0.6816
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.1774 0.01613 0.03024 0.3911
## Detection Prevalence 0.2802 0.02419 0.05645 0.6391
## Balanced Accuracy 0.6528 0.78156 0.65607 0.6354
# Naive Bayes
prediction_naivebayes_train <- predict(modfit.naivebayes, train, type="class")
confusion_matrix_naivebayes_train <- confusionMatrix(prediction_naivebayes_train, train$New.Provider)
confusion_matrix_naivebayes_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 344 1 17 226
## Aramark 25 30 11 30
## Compass 11 0 20 26
## Sodexo 366 24 130 719
##
## Overall Statistics
##
## Accuracy : 0.5621
## 95% CI : (0.5399, 0.5841)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : 2.597e-07
##
## Kappa : 0.2288
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4611 0.54545 0.11236 0.7183
## Specificity 0.8023 0.96571 0.97947 0.4688
## Pos Pred Value 0.5850 0.31250 0.35088 0.5803
## Neg Pred Value 0.7112 0.98673 0.91784 0.6194
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1737 0.01515 0.01010 0.3631
## Detection Prevalence 0.2970 0.04848 0.02879 0.6258
## Balanced Accuracy 0.6317 0.75558 0.54591 0.5936
prediction_naivebayes_test <- predict(modfit.naivebayes, test, type="class")
confusion_matrix_naivebayes_test <- confusionMatrix(prediction_naivebayes_test, test$New.Provider)
confusion_matrix_naivebayes_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 85 0 4 41
## Aramark 3 9 7 6
## Compass 3 0 6 4
## Sodexo 96 5 27 200
##
## Overall Statistics
##
## Accuracy : 0.6048
## 95% CI : (0.5603, 0.6481)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 6.144e-06
##
## Kappa : 0.2979
##
## Mcnemar's Test P-Value : 6.262e-09
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4545 0.64286 0.13636 0.7968
## Specificity 0.8544 0.96680 0.98451 0.4776
## Pos Pred Value 0.6538 0.36000 0.46154 0.6098
## Neg Pred Value 0.7213 0.98938 0.92133 0.6964
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.1714 0.01815 0.01210 0.4032
## Detection Prevalence 0.2621 0.05040 0.02621 0.6613
## Balanced Accuracy 0.6545 0.80483 0.56044 0.6372
## SVM
prediction_svm_train <- predict(modfit.svm, train, type="class")
confusion_matrix_svm_train <- confusionMatrix(prediction_svm_train, train$New.Provider)
confusion_matrix_svm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 389 4 30 197
## Aramark 9 20 4 7
## Compass 7 3 36 5
## Sodexo 341 28 108 792
##
## Overall Statistics
##
## Accuracy : 0.6247
## 95% CI : (0.603, 0.6461)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.324
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5214 0.36364 0.20225 0.7912
## Specificity 0.8128 0.98961 0.99168 0.5128
## Pos Pred Value 0.6274 0.50000 0.70588 0.6241
## Neg Pred Value 0.7375 0.98196 0.92639 0.7060
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1965 0.01010 0.01818 0.4000
## Detection Prevalence 0.3131 0.02020 0.02576 0.6409
## Balanced Accuracy 0.6671 0.67662 0.59696 0.6520
prediction_svm_test <- predict(modfit.svm, test, type="class")
confusion_matrix_svm_test <- confusionMatrix(prediction_svm_test, test$New.Provider)
confusion_matrix_svm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 95 1 9 40
## Aramark 0 9 3 0
## Compass 2 0 9 3
## Sodexo 90 4 23 208
##
## Overall Statistics
##
## Accuracy : 0.6472
## 95% CI : (0.6033, 0.6893)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 1.588e-10
##
## Kappa : 0.3643
##
## Mcnemar's Test P-Value : 1.812e-08
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5080 0.64286 0.20455 0.8287
## Specificity 0.8382 0.99378 0.98894 0.5224
## Pos Pred Value 0.6552 0.75000 0.64286 0.6400
## Neg Pred Value 0.7379 0.98967 0.92739 0.7485
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.1915 0.01815 0.01815 0.4194
## Detection Prevalence 0.2923 0.02419 0.02823 0.6552
## Balanced Accuracy 0.6731 0.81832 0.59674 0.6756
#RF
ens_input_rf_train_predict <- predict(modfit.rf, train, type="prob")
colnames(ens_input_rf_train_predict)<-paste(colnames(ens_input_rf_train_predict),"rf",sep="_")
#GBM
ens_input_gbm_train_predict<- predict(modfit.gbm,train, type="prob")
colnames(ens_input_gbm_train_predict)<-paste(colnames(ens_input_gbm_train_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_train_predict <- predict(modfit.mlogit, train, type="prob")
colnames(ens_input_ml_train_predict)<-paste(colnames(ens_input_ml_train_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, train, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_train_predict <- predict(modfit.svm, train,decision.values = FALSE, probability = TRUE)
ens_input_svm_train_predict<- attr(ens_input_svm_train_predict, "probabilities")
colnames(ens_input_svm_train_predict)<-paste(colnames(ens_input_svm_train_predict),"svm",sep="_")
# Ensemble with main data and predictions
ens_input <- cbind( train,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens <- randomForest(New.Provider ~. , data=ens_input)
# Ensemble with predictions
New.Provider<- train$New.Provider
ens_input_new <- cbind( New.Provider,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens_new <- randomForest(New.Provider ~. , data=ens_input_new)
## Functions
ens_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( input,ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens, ens_input, type="class")
return(prediction_rf_ens)
}
ens_new_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens_new, ens_input, type="class")
return(prediction_rf_ens)
}
confusionMatrix(ens_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 2 3
## Aramark 0 55 0 0
## Compass 0 0 176 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.9975
## 95% CI : (0.9941, 0.9992)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9957
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.98876 0.9970
## Specificity 0.9959 1.00000 1.00000 1.0000
## Pos Pred Value 0.9933 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99889 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08889 0.5040
## Detection Prevalence 0.3793 0.02778 0.08889 0.5040
## Balanced Accuracy 0.9980 1.00000 0.99438 0.9985
confusionMatrix(ens_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 126 2 12 45
## Aramark 0 9 4 0
## Compass 6 0 24 8
## Sodexo 55 3 4 198
##
## Overall Statistics
##
## Accuracy : 0.7198
## 95% CI : (0.678, 0.7589)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.5222
##
## Mcnemar's Test P-Value : 0.03804
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6738 0.64286 0.54545 0.7888
## Specificity 0.8091 0.99170 0.96903 0.7469
## Pos Pred Value 0.6811 0.69231 0.63158 0.7615
## Neg Pred Value 0.8039 0.98965 0.95633 0.7754
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2540 0.01815 0.04839 0.3992
## Detection Prevalence 0.3730 0.02621 0.07661 0.5242
## Balanced Accuracy 0.7414 0.81728 0.75724 0.7679
confusionMatrix(ens_new_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 2 3
## Aramark 0 55 0 0
## Compass 0 0 176 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.9975
## 95% CI : (0.9941, 0.9992)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9957
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.98876 0.9970
## Specificity 0.9959 1.00000 1.00000 1.0000
## Pos Pred Value 0.9933 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99889 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08889 0.5040
## Detection Prevalence 0.3793 0.02778 0.08889 0.5040
## Balanced Accuracy 0.9980 1.00000 0.99438 0.9985
confusionMatrix(ens_new_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 126 2 12 43
## Aramark 0 9 4 0
## Compass 6 0 25 8
## Sodexo 55 3 3 200
##
## Overall Statistics
##
## Accuracy : 0.7258
## 95% CI : (0.6843, 0.7646)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.5328
##
## Mcnemar's Test P-Value : 0.02236
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6738 0.64286 0.56818 0.7968
## Specificity 0.8155 0.99170 0.96903 0.7510
## Pos Pred Value 0.6885 0.69231 0.64103 0.7663
## Neg Pred Value 0.8051 0.98965 0.95842 0.7830
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2540 0.01815 0.05040 0.4032
## Detection Prevalence 0.3690 0.02621 0.07863 0.5262
## Balanced Accuracy 0.7447 0.81728 0.76860 0.7739
str_emphasis_names <- colnames(Startegic_Emphasis_Data)
str_emphasis_names <-str_emphasis_names[str_emphasis_names != "Fiscal Year Sodexo"]
all_variables <- c(outcome, variables,str_emphasis_names)
train_main$`Fiscal Year Sodexo` <- as.double(train_main$`Fiscal Year Sodexo` )
train_main_new <- full_join(train_main, Startegic_Emphasis_Data, by = "Fiscal Year Sodexo")
train_main_new <- full_join(train_main_new, Startegic_Emphasis_Data_Combined, by = "Fiscal Year Sodexo")
test_main$`Fiscal Year Sodexo` <- as.double(test_main$`Fiscal Year Sodexo` )
test_main_new <- full_join(test_main, Startegic_Emphasis_Data, by = "Fiscal Year Sodexo")
test_main_new <- full_join(test_main_new, Startegic_Emphasis_Data_Combined, by = "Fiscal Year Sodexo")
outcome <- "New Provider"
variables <- c("Old Provider","Opportunity Global Region","Service Level 1","Strategic Segment L1","Regional Strategic Account","Revenue (Annual) (converted) in Mill Euros","Margin (GP/BR)" )
train <- train_main_new %>% dplyr::select(all_of(all_variables))
test <- test_main_new %>% dplyr::select(all_of(all_variables))
colnames(train) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin",str_emphasis_names )
colnames(test) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin",str_emphasis_names )
library(randomForest)
modfit.rf <- randomForest(New.Provider ~. , data=train)
## GBM
library(caret)
myControl <- trainControl(method='cv', number=5, returnResamp='none')
labelName <- 'New.Provider'
predictors <- names(train)[names(train) != labelName]
modfit.gbm <- train(train[,predictors], train[,labelName], method='gbm', trControl=myControl)
f <- as.formula(paste(labelName, paste(predictors, collapse = " + "),sep = "~"))
modfit.mlogit <- multinom(f, data = train)
library(e1071)
modfit.naivebayes <- e1071::naiveBayes(New.Provider ~ ., data = train)
modfit.svm <- e1071::svm(New.Provider ~ ., data = train,probability = TRUE)
#RF
prediction_rf_train <- predict(modfit.rf, train, type="class")
confusion_matrix_rf_train <- confusionMatrix(prediction_rf_train, train$New.Provider)
confusion_matrix_rf_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 444 4 61 155
## Aramark 0 9 0 0
## Compass 0 0 0 0
## Sodexo 302 42 117 846
##
## Overall Statistics
##
## Accuracy : 0.6561
## 95% CI : (0.6347, 0.677)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3628
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5952 0.163636 0.0000 0.8452
## Specificity 0.8217 1.000000 1.0000 0.5291
## Pos Pred Value 0.6687 1.000000 NaN 0.6473
## Neg Pred Value 0.7705 0.976662 0.9101 0.7697
## Prevalence 0.3768 0.027778 0.0899 0.5056
## Detection Rate 0.2242 0.004545 0.0000 0.4273
## Detection Prevalence 0.3354 0.004545 0.0000 0.6601
## Balanced Accuracy 0.7084 0.581818 0.5000 0.6871
prediction_rf_test <- predict(modfit.rf, test, type="class")
confusion_matrix_rf_test <- confusionMatrix(prediction_rf_test, test$New.Provider)
confusion_matrix_rf_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 120 1 16 39
## Aramark 0 0 0 0
## Compass 0 0 0 0
## Sodexo 67 13 28 212
##
## Overall Statistics
##
## Accuracy : 0.6694
## 95% CI : (0.626, 0.7106)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 1.394e-13
##
## Kappa : 0.3874
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6417 0.00000 0.00000 0.8446
## Specificity 0.8188 1.00000 1.00000 0.5592
## Pos Pred Value 0.6818 NaN NaN 0.6625
## Neg Pred Value 0.7906 0.97177 0.91129 0.7784
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2419 0.00000 0.00000 0.4274
## Detection Prevalence 0.3548 0.00000 0.00000 0.6452
## Balanced Accuracy 0.7302 0.50000 0.50000 0.7019
# GBM
train_preds <- as.data.frame(predict(object=modfit.gbm, train[,predictors]))
confusion_matrix_gbm_train <- confusionMatrix(train_preds$`predict(object = modfit.gbm, train[, predictors])`, train$New.Provider)
confusion_matrix_gbm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 584 0 22 119
## Aramark 0 46 1 3
## Compass 12 0 118 9
## Sodexo 150 9 37 870
##
## Overall Statistics
##
## Accuracy : 0.8172
## 95% CI : (0.7994, 0.834)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6863
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7828 0.83636 0.6629 0.8691
## Specificity 0.8857 0.99792 0.9883 0.7998
## Pos Pred Value 0.8055 0.92000 0.8489 0.8161
## Neg Pred Value 0.8709 0.99534 0.9674 0.8567
## Prevalence 0.3768 0.02778 0.0899 0.5056
## Detection Rate 0.2949 0.02323 0.0596 0.4394
## Detection Prevalence 0.3662 0.02525 0.0702 0.5384
## Balanced Accuracy 0.8343 0.91714 0.8256 0.8345
test_preds <- as.data.frame(predict(object=modfit.gbm, test[,predictors]))
confusion_matrix_gbm_test <- confusionMatrix(test_preds$`predict(object = modfit.gbm, test[, predictors])`, test$New.Provider)
confusion_matrix_gbm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 139 0 10 39
## Aramark 0 11 2 1
## Compass 5 0 19 7
## Sodexo 43 3 13 204
##
## Overall Statistics
##
## Accuracy : 0.752
## 95% CI : (0.7116, 0.7894)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5742
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7433 0.78571 0.43182 0.8127
## Specificity 0.8414 0.99378 0.97345 0.7592
## Pos Pred Value 0.7394 0.78571 0.61290 0.7757
## Neg Pred Value 0.8442 0.99378 0.94624 0.7983
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2802 0.02218 0.03831 0.4113
## Detection Prevalence 0.3790 0.02823 0.06250 0.5302
## Balanced Accuracy 0.7924 0.88975 0.70263 0.7860
# MLogit
prediction_mlogit_train <- predict(modfit.mlogit, train, type="class")
confusion_matrix_mlogit_train <- confusionMatrix(prediction_mlogit_train, train$New.Provider)
confusion_matrix_mlogit_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 376 7 31 185
## Aramark 8 22 4 6
## Compass 15 5 57 30
## Sodexo 347 21 86 780
##
## Overall Statistics
##
## Accuracy : 0.6237
## 95% CI : (0.602, 0.6451)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3347
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5040 0.40000 0.32022 0.7792
## Specificity 0.8193 0.99065 0.97225 0.5363
## Pos Pred Value 0.6277 0.55000 0.53271 0.6321
## Neg Pred Value 0.7321 0.98299 0.93540 0.7038
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1899 0.01111 0.02879 0.3939
## Detection Prevalence 0.3025 0.02020 0.05404 0.6232
## Balanced Accuracy 0.6617 0.69532 0.64624 0.6577
prediction_mlogit_test <- predict(modfit.mlogit, test, type="class")
confusion_matrix_mlogit_test <- confusionMatrix(prediction_mlogit_test, test$New.Provider)
confusion_matrix_mlogit_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 102 2 8 39
## Aramark 1 8 3 0
## Compass 5 0 13 11
## Sodexo 79 4 20 201
##
## Overall Statistics
##
## Accuracy : 0.6532
## 95% CI : (0.6095, 0.6951)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 2.588e-11
##
## Kappa : 0.3908
##
## Mcnemar's Test P-Value : 0.0004802
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5455 0.57143 0.29545 0.8008
## Specificity 0.8414 0.99170 0.96460 0.5796
## Pos Pred Value 0.6755 0.66667 0.44828 0.6612
## Neg Pred Value 0.7536 0.98760 0.93362 0.7396
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2056 0.01613 0.02621 0.4052
## Detection Prevalence 0.3044 0.02419 0.05847 0.6129
## Balanced Accuracy 0.6934 0.78156 0.63003 0.6902
# Naive Bayes
prediction_naivebayes_train <- predict(modfit.naivebayes, train, type="class")
confusion_matrix_naivebayes_train <- confusionMatrix(prediction_naivebayes_train, train$New.Provider)
confusion_matrix_naivebayes_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 277 3 13 141
## Aramark 106 37 21 196
## Compass 156 15 113 241
## Sodexo 207 0 31 423
##
## Overall Statistics
##
## Accuracy : 0.4293
## 95% CI : (0.4074, 0.4514)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.2071
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.3713 0.67273 0.63483 0.4226
## Specificity 0.8728 0.83221 0.77137 0.7569
## Pos Pred Value 0.6382 0.10278 0.21524 0.6399
## Neg Pred Value 0.6966 0.98889 0.95533 0.5618
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1399 0.01869 0.05707 0.2136
## Detection Prevalence 0.2192 0.18182 0.26515 0.3338
## Balanced Accuracy 0.6220 0.75247 0.70310 0.5897
prediction_naivebayes_test <- predict(modfit.naivebayes, test, type="class")
confusion_matrix_naivebayes_test <- confusionMatrix(prediction_naivebayes_test, test$New.Provider)
confusion_matrix_naivebayes_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 77 0 3 33
## Aramark 18 7 5 61
## Compass 40 7 30 57
## Sodexo 52 0 6 100
##
## Overall Statistics
##
## Accuracy : 0.4315
## 95% CI : (0.3874, 0.4763)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 0.9996
##
## Kappa : 0.2145
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4118 0.50000 0.68182 0.3984
## Specificity 0.8835 0.82573 0.76991 0.7633
## Pos Pred Value 0.6814 0.07692 0.22388 0.6329
## Neg Pred Value 0.7128 0.98272 0.96133 0.5533
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.1552 0.01411 0.06048 0.2016
## Detection Prevalence 0.2278 0.18347 0.27016 0.3185
## Balanced Accuracy 0.6476 0.66286 0.72586 0.5808
## SVM
prediction_svm_train <- predict(modfit.svm, train, type="class")
confusion_matrix_svm_train <- confusionMatrix(prediction_svm_train, train$New.Provider)
confusion_matrix_svm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 452 8 46 172
## Aramark 0 0 0 0
## Compass 0 0 5 0
## Sodexo 294 47 127 829
##
## Overall Statistics
##
## Accuracy : 0.6495
## 95% CI : (0.628, 0.6705)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3504
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6059 0.00000 0.028090 0.8282
## Specificity 0.8169 1.00000 1.000000 0.5220
## Pos Pred Value 0.6667 NaN 1.000000 0.6392
## Neg Pred Value 0.7742 0.97222 0.912405 0.7482
## Prevalence 0.3768 0.02778 0.089899 0.5056
## Detection Rate 0.2283 0.00000 0.002525 0.4187
## Detection Prevalence 0.3424 0.00000 0.002525 0.6551
## Balanced Accuracy 0.7114 0.50000 0.514045 0.6751
prediction_svm_test <- predict(modfit.svm, test, type="class")
confusion_matrix_svm_test <- confusionMatrix(prediction_svm_test, test$New.Provider)
confusion_matrix_svm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 111 1 11 41
## Aramark 0 0 0 0
## Compass 0 0 1 0
## Sodexo 76 13 32 210
##
## Overall Statistics
##
## Accuracy : 0.6492
## 95% CI : (0.6054, 0.6912)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 8.75e-11
##
## Kappa : 0.3473
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5936 0.00000 0.022727 0.8367
## Specificity 0.8285 1.00000 1.000000 0.5061
## Pos Pred Value 0.6768 NaN 1.000000 0.6344
## Neg Pred Value 0.7711 0.97177 0.913131 0.7515
## Prevalence 0.3770 0.02823 0.088710 0.5060
## Detection Rate 0.2238 0.00000 0.002016 0.4234
## Detection Prevalence 0.3306 0.00000 0.002016 0.6673
## Balanced Accuracy 0.7110 0.50000 0.511364 0.6714
#RF
ens_input_rf_train_predict <- predict(modfit.rf, train, type="prob")
colnames(ens_input_rf_train_predict)<-paste(colnames(ens_input_rf_train_predict),"rf",sep="_")
#GBM
ens_input_gbm_train_predict<- predict(modfit.gbm,train, type="prob")
colnames(ens_input_gbm_train_predict)<-paste(colnames(ens_input_gbm_train_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_train_predict <- predict(modfit.mlogit, train, type="prob")
colnames(ens_input_ml_train_predict)<-paste(colnames(ens_input_ml_train_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, train, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_train_predict <- predict(modfit.svm, train,decision.values = FALSE, probability = TRUE)
ens_input_svm_train_predict<- attr(ens_input_svm_train_predict, "probabilities")
colnames(ens_input_svm_train_predict)<-paste(colnames(ens_input_svm_train_predict),"svm",sep="_")
# Ensemble with main data and predictions
ens_input <- cbind( train,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens <- randomForest(New.Provider ~. , data=ens_input)
# Ensemble with predictions
New.Provider<- train$New.Provider
ens_input_new <- cbind( New.Provider,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens_new <- randomForest(New.Provider ~. , data=ens_input_new)
## Functions
ens_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( input,ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens, ens_input, type="class")
return(prediction_rf_ens)
}
ens_new_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens_new, ens_input, type="class")
return(prediction_rf_ens)
}
confusionMatrix(ens_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 1 3
## Aramark 0 55 0 0
## Compass 0 0 177 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.998
## 95% CI : (0.9948, 0.9994)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9966
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.99438 0.9970
## Specificity 0.9968 1.00000 1.00000 1.0000
## Pos Pred Value 0.9947 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99945 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08939 0.5040
## Detection Prevalence 0.3788 0.02778 0.08939 0.5040
## Balanced Accuracy 0.9984 1.00000 0.99719 0.9985
confusionMatrix(ens_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 138 1 8 36
## Aramark 0 11 0 1
## Compass 3 0 31 5
## Sodexo 46 2 5 209
##
## Overall Statistics
##
## Accuracy : 0.7843
## 95% CI : (0.7454, 0.8197)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6318
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7380 0.78571 0.70455 0.8327
## Specificity 0.8544 0.99793 0.98230 0.7837
## Pos Pred Value 0.7541 0.91667 0.79487 0.7977
## Neg Pred Value 0.8435 0.99380 0.97155 0.8205
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2782 0.02218 0.06250 0.4214
## Detection Prevalence 0.3690 0.02419 0.07863 0.5282
## Balanced Accuracy 0.7962 0.89182 0.84342 0.8082
confusionMatrix(ens_new_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 1 3
## Aramark 0 55 0 0
## Compass 0 0 177 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.998
## 95% CI : (0.9948, 0.9994)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9966
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.99438 0.9970
## Specificity 0.9968 1.00000 1.00000 1.0000
## Pos Pred Value 0.9947 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99945 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08939 0.5040
## Detection Prevalence 0.3788 0.02778 0.08939 0.5040
## Balanced Accuracy 0.9984 1.00000 0.99719 0.9985
confusionMatrix(ens_new_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 140 0 8 34
## Aramark 0 11 0 1
## Compass 4 0 30 5
## Sodexo 43 3 6 211
##
## Overall Statistics
##
## Accuracy : 0.7903
## 95% CI : (0.7518, 0.8253)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.642
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7487 0.78571 0.68182 0.8406
## Specificity 0.8641 0.99793 0.98009 0.7878
## Pos Pred Value 0.7692 0.91667 0.76923 0.8023
## Neg Pred Value 0.8503 0.99380 0.96937 0.8283
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2823 0.02218 0.06048 0.4254
## Detection Prevalence 0.3669 0.02419 0.07863 0.5302
## Balanced Accuracy 0.8064 0.89182 0.83095 0.8142
str_emphasis_names <- colnames(Startegic_Emphasis_Data_Combined)
str_emphasis_names <-str_emphasis_names[str_emphasis_names != "Fiscal Year Sodexo"]
all_variables <- c(outcome, variables,str_emphasis_names)
train_main$`Fiscal Year Sodexo` <- as.double(train_main$`Fiscal Year Sodexo` )
train_main_new <- full_join(train_main, Startegic_Emphasis_Data, by = "Fiscal Year Sodexo")
train_main_new <- full_join(train_main_new, Startegic_Emphasis_Data_Combined, by = "Fiscal Year Sodexo")
test_main$`Fiscal Year Sodexo` <- as.double(test_main$`Fiscal Year Sodexo` )
test_main_new <- full_join(test_main, Startegic_Emphasis_Data, by = "Fiscal Year Sodexo")
test_main_new <- full_join(test_main_new, Startegic_Emphasis_Data_Combined, by = "Fiscal Year Sodexo")
outcome <- "New Provider"
variables <- c("Old Provider","Opportunity Global Region","Service Level 1","Strategic Segment L1","Regional Strategic Account","Revenue (Annual) (converted) in Mill Euros","Margin (GP/BR)" )
train <- train_main_new %>% dplyr::select(all_of(all_variables))
test <- test_main_new %>% dplyr::select(all_of(all_variables))
colnames(train) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin",str_emphasis_names )
colnames(test) <- c("New.Provider","Old.Provider","Opportunity.Global.Region","Service.Level.1","Strategic.Segment.L1","Regional.Strategic.Account","Revenue.Mill.Euros","Margin",str_emphasis_names )
library(randomForest)
modfit.rf <- randomForest(New.Provider ~. , data=train)
## GBM
library(caret)
myControl <- trainControl(method='cv', number=5, returnResamp='none')
labelName <- 'New.Provider'
predictors <- names(train)[names(train) != labelName]
modfit.gbm <- train(train[,predictors], train[,labelName], method='gbm', trControl=myControl)
f <- as.formula(paste(labelName, paste(predictors, collapse = " + "),sep = "~"))
modfit.mlogit <- multinom(f, data = train)
library(e1071)
modfit.naivebayes <- e1071::naiveBayes(New.Provider ~ ., data = train)
modfit.svm <- e1071::svm(New.Provider ~ ., data = train,probability = TRUE)
#RF
prediction_rf_train <- predict(modfit.rf, train, type="class")
confusion_matrix_rf_train <- confusionMatrix(prediction_rf_train, train$New.Provider)
confusion_matrix_rf_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 540 6 49 102
## Aramark 0 18 0 0
## Compass 1 0 56 0
## Sodexo 205 31 73 899
##
## Overall Statistics
##
## Accuracy : 0.7641
## 95% CI : (0.7448, 0.7827)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5759
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7239 0.327273 0.31461 0.8981
## Specificity 0.8728 1.000000 0.99945 0.6844
## Pos Pred Value 0.7747 1.000000 0.98246 0.7442
## Neg Pred Value 0.8394 0.981142 0.93656 0.8679
## Prevalence 0.3768 0.027778 0.08990 0.5056
## Detection Rate 0.2727 0.009091 0.02828 0.4540
## Detection Prevalence 0.3520 0.009091 0.02879 0.6101
## Balanced Accuracy 0.7983 0.663636 0.65703 0.7912
prediction_rf_test <- predict(modfit.rf, test, type="class")
confusion_matrix_rf_test <- confusionMatrix(prediction_rf_test, test$New.Provider)
confusion_matrix_rf_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 126 2 13 27
## Aramark 0 3 0 0
## Compass 0 1 11 1
## Sodexo 61 8 20 223
##
## Overall Statistics
##
## Accuracy : 0.7319
## 95% CI : (0.6906, 0.7704)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5138
##
## Mcnemar's Test P-Value : 6.338e-10
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.6738 0.214286 0.25000 0.8884
## Specificity 0.8641 1.000000 0.99558 0.6367
## Pos Pred Value 0.7500 1.000000 0.84615 0.7147
## Neg Pred Value 0.8140 0.977688 0.93168 0.8478
## Prevalence 0.3770 0.028226 0.08871 0.5060
## Detection Rate 0.2540 0.006048 0.02218 0.4496
## Detection Prevalence 0.3387 0.006048 0.02621 0.6290
## Balanced Accuracy 0.7689 0.607143 0.62279 0.7626
# GBM
train_preds <- as.data.frame(predict(object=modfit.gbm, train[,predictors]))
confusion_matrix_gbm_train <- confusionMatrix(train_preds$`predict(object = modfit.gbm, train[, predictors])`, train$New.Provider)
confusion_matrix_gbm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 587 1 24 107
## Aramark 0 45 1 3
## Compass 12 1 116 12
## Sodexo 147 8 37 879
##
## Overall Statistics
##
## Accuracy : 0.8217
## 95% CI : (0.8041, 0.8383)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.694
##
## Mcnemar's Test P-Value : 0.0001935
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7869 0.81818 0.65169 0.8781
## Specificity 0.8930 0.99792 0.98613 0.8039
## Pos Pred Value 0.8164 0.91837 0.82270 0.8207
## Neg Pred Value 0.8739 0.99482 0.96629 0.8658
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.2965 0.02273 0.05859 0.4439
## Detection Prevalence 0.3631 0.02475 0.07121 0.5409
## Balanced Accuracy 0.8399 0.90805 0.81891 0.8410
test_preds <- as.data.frame(predict(object=modfit.gbm, test[,predictors]))
confusion_matrix_gbm_test <- confusionMatrix(test_preds$`predict(object = modfit.gbm, test[, predictors])`, test$New.Provider)
confusion_matrix_gbm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 136 1 9 37
## Aramark 0 11 3 0
## Compass 5 0 18 6
## Sodexo 46 2 14 208
##
## Overall Statistics
##
## Accuracy : 0.752
## 95% CI : (0.7116, 0.7894)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.572
##
## Mcnemar's Test P-Value : 0.07901
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7273 0.78571 0.40909 0.8287
## Specificity 0.8479 0.99378 0.97566 0.7469
## Pos Pred Value 0.7432 0.78571 0.62069 0.7704
## Neg Pred Value 0.8371 0.99378 0.94433 0.8097
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2742 0.02218 0.03629 0.4194
## Detection Prevalence 0.3690 0.02823 0.05847 0.5444
## Balanced Accuracy 0.7876 0.88975 0.69238 0.7878
# MLogit
prediction_mlogit_train <- predict(modfit.mlogit, train, type="class")
confusion_matrix_mlogit_train <- confusionMatrix(prediction_mlogit_train, train$New.Provider)
confusion_matrix_mlogit_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 376 7 31 185
## Aramark 8 22 4 6
## Compass 15 5 57 30
## Sodexo 347 21 86 780
##
## Overall Statistics
##
## Accuracy : 0.6237
## 95% CI : (0.602, 0.6451)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3347
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5040 0.40000 0.32022 0.7792
## Specificity 0.8193 0.99065 0.97225 0.5363
## Pos Pred Value 0.6277 0.55000 0.53271 0.6321
## Neg Pred Value 0.7321 0.98299 0.93540 0.7038
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1899 0.01111 0.02879 0.3939
## Detection Prevalence 0.3025 0.02020 0.05404 0.6232
## Balanced Accuracy 0.6617 0.69532 0.64624 0.6577
prediction_mlogit_test <- predict(modfit.mlogit, test, type="class")
confusion_matrix_mlogit_test <- confusionMatrix(prediction_mlogit_test, test$New.Provider)
confusion_matrix_mlogit_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 102 2 8 39
## Aramark 1 8 3 0
## Compass 5 0 13 11
## Sodexo 79 4 20 201
##
## Overall Statistics
##
## Accuracy : 0.6532
## 95% CI : (0.6095, 0.6951)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 2.588e-11
##
## Kappa : 0.3908
##
## Mcnemar's Test P-Value : 0.0004802
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5455 0.57143 0.29545 0.8008
## Specificity 0.8414 0.99170 0.96460 0.5796
## Pos Pred Value 0.6755 0.66667 0.44828 0.6612
## Neg Pred Value 0.7536 0.98760 0.93362 0.7396
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2056 0.01613 0.02621 0.4052
## Detection Prevalence 0.3044 0.02419 0.05847 0.6129
## Balanced Accuracy 0.6934 0.78156 0.63003 0.6902
# Naive Bayes
prediction_naivebayes_train <- predict(modfit.naivebayes, train, type="class")
confusion_matrix_naivebayes_train <- confusionMatrix(prediction_naivebayes_train, train$New.Provider)
confusion_matrix_naivebayes_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 364 7 39 246
## Aramark 54 35 15 107
## Compass 64 11 64 137
## Sodexo 264 2 60 511
##
## Overall Statistics
##
## Accuracy : 0.4919
## 95% CI : (0.4697, 0.5142)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : 0.8918
##
## Kappa : 0.2135
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.4879 0.63636 0.35955 0.5105
## Specificity 0.7634 0.90857 0.88235 0.6670
## Pos Pred Value 0.5549 0.16588 0.23188 0.6105
## Neg Pred Value 0.7115 0.98869 0.93310 0.5713
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.1838 0.01768 0.03232 0.2581
## Detection Prevalence 0.3313 0.10657 0.13939 0.4227
## Balanced Accuracy 0.6257 0.77247 0.62095 0.5887
prediction_naivebayes_test <- predict(modfit.naivebayes, test, type="class")
confusion_matrix_naivebayes_test <- confusionMatrix(prediction_naivebayes_test, test$New.Provider)
confusion_matrix_naivebayes_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 100 0 10 49
## Aramark 9 5 3 35
## Compass 17 7 20 39
## Sodexo 61 2 11 128
##
## Overall Statistics
##
## Accuracy : 0.5101
## 95% CI : (0.4651, 0.5549)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 0.4465
##
## Kappa : 0.2523
##
## Mcnemar's Test P-Value : 7.755e-11
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5348 0.35714 0.45455 0.5100
## Specificity 0.8091 0.90249 0.86062 0.6980
## Pos Pred Value 0.6289 0.09615 0.24096 0.6337
## Neg Pred Value 0.7418 0.97973 0.94189 0.5816
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2016 0.01008 0.04032 0.2581
## Detection Prevalence 0.3206 0.10484 0.16734 0.4073
## Balanced Accuracy 0.6719 0.62982 0.65758 0.6040
## SVM
prediction_svm_train <- predict(modfit.svm, train, type="class")
confusion_matrix_svm_train <- confusionMatrix(prediction_svm_train, train$New.Provider)
confusion_matrix_svm_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 404 8 44 192
## Aramark 0 0 0 0
## Compass 6 0 26 5
## Sodexo 336 47 108 804
##
## Overall Statistics
##
## Accuracy : 0.6232
## 95% CI : (0.6015, 0.6446)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3079
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5416 0.00000 0.14607 0.8032
## Specificity 0.8023 1.00000 0.99390 0.4985
## Pos Pred Value 0.6235 NaN 0.70270 0.6208
## Neg Pred Value 0.7432 0.97222 0.92177 0.7124
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.2040 0.00000 0.01313 0.4061
## Detection Prevalence 0.3273 0.00000 0.01869 0.6540
## Balanced Accuracy 0.6719 0.50000 0.56998 0.6508
prediction_svm_test <- predict(modfit.svm, test, type="class")
confusion_matrix_svm_test <- confusionMatrix(prediction_svm_test, test$New.Provider)
confusion_matrix_svm_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 112 1 13 38
## Aramark 0 0 0 0
## Compass 0 0 4 1
## Sodexo 75 13 27 212
##
## Overall Statistics
##
## Accuracy : 0.6613
## 95% CI : (0.6178, 0.7029)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : 2.038e-12
##
## Kappa : 0.3737
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.5989 0.00000 0.090909 0.8446
## Specificity 0.8317 1.00000 0.997788 0.5306
## Pos Pred Value 0.6829 NaN 0.800000 0.6483
## Neg Pred Value 0.7741 0.97177 0.918534 0.7692
## Prevalence 0.3770 0.02823 0.088710 0.5060
## Detection Rate 0.2258 0.00000 0.008065 0.4274
## Detection Prevalence 0.3306 0.00000 0.010081 0.6593
## Balanced Accuracy 0.7153 0.50000 0.544348 0.6876
#RF
ens_input_rf_train_predict <- predict(modfit.rf, train, type="prob")
colnames(ens_input_rf_train_predict)<-paste(colnames(ens_input_rf_train_predict),"rf",sep="_")
#GBM
ens_input_gbm_train_predict<- predict(modfit.gbm,train, type="prob")
colnames(ens_input_gbm_train_predict)<-paste(colnames(ens_input_gbm_train_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_train_predict <- predict(modfit.mlogit, train, type="prob")
colnames(ens_input_ml_train_predict)<-paste(colnames(ens_input_ml_train_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, train, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_train_predict <- predict(modfit.svm, train,decision.values = FALSE, probability = TRUE)
ens_input_svm_train_predict<- attr(ens_input_svm_train_predict, "probabilities")
colnames(ens_input_svm_train_predict)<-paste(colnames(ens_input_svm_train_predict),"svm",sep="_")
# Ensemble with main data and predictions
ens_input <- cbind( train,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens <- randomForest(New.Provider ~. , data=ens_input)
# Ensemble with predictions
New.Provider<- train$New.Provider
ens_input_new <- cbind( New.Provider,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens_new <- randomForest(New.Provider ~. , data=ens_input_new)
## Functions
ens_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( input,ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens, ens_input, type="class")
return(prediction_rf_ens)
}
ens_new_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens_new, ens_input, type="class")
return(prediction_rf_ens)
}
confusionMatrix(ens_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 1 3
## Aramark 0 55 0 0
## Compass 0 0 177 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.998
## 95% CI : (0.9948, 0.9994)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9966
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.99438 0.9970
## Specificity 0.9968 1.00000 1.00000 1.0000
## Pos Pred Value 0.9947 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99945 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08939 0.5040
## Detection Prevalence 0.3788 0.02778 0.08939 0.5040
## Balanced Accuracy 0.9984 1.00000 0.99719 0.9985
confusionMatrix(ens_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 146 1 8 37
## Aramark 0 11 0 0
## Compass 4 0 31 5
## Sodexo 37 2 5 209
##
## Overall Statistics
##
## Accuracy : 0.8004
## 95% CI : (0.7625, 0.8347)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6606
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7807 0.78571 0.70455 0.8327
## Specificity 0.8511 1.00000 0.98009 0.8204
## Pos Pred Value 0.7604 1.00000 0.77500 0.8261
## Neg Pred Value 0.8651 0.99381 0.97149 0.8272
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2944 0.02218 0.06250 0.4214
## Detection Prevalence 0.3871 0.02218 0.08065 0.5101
## Balanced Accuracy 0.8159 0.89286 0.84232 0.8265
confusionMatrix(ens_new_prediction(train), train$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 746 0 1 3
## Aramark 0 55 0 0
## Compass 0 0 177 0
## Sodexo 0 0 0 998
##
## Overall Statistics
##
## Accuracy : 0.998
## 95% CI : (0.9948, 0.9994)
## No Information Rate : 0.5056
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9966
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 1.0000 1.00000 0.99438 0.9970
## Specificity 0.9968 1.00000 1.00000 1.0000
## Pos Pred Value 0.9947 1.00000 1.00000 1.0000
## Neg Pred Value 1.0000 1.00000 0.99945 0.9969
## Prevalence 0.3768 0.02778 0.08990 0.5056
## Detection Rate 0.3768 0.02778 0.08939 0.5040
## Detection Prevalence 0.3788 0.02778 0.08939 0.5040
## Balanced Accuracy 0.9984 1.00000 0.99719 0.9985
confusionMatrix(ens_new_prediction(test), test$New.Provider)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Other Aramark Compass Sodexo
## Other 145 1 9 32
## Aramark 0 10 0 2
## Compass 4 1 30 5
## Sodexo 38 2 5 212
##
## Overall Statistics
##
## Accuracy : 0.8004
## 95% CI : (0.7625, 0.8347)
## No Information Rate : 0.506
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6604
##
## Mcnemar's Test P-Value : 0.6177
##
## Statistics by Class:
##
## Class: Other Class: Aramark Class: Compass Class: Sodexo
## Sensitivity 0.7754 0.71429 0.68182 0.8446
## Specificity 0.8641 0.99585 0.97788 0.8163
## Pos Pred Value 0.7754 0.83333 0.75000 0.8249
## Neg Pred Value 0.8641 0.99174 0.96930 0.8368
## Prevalence 0.3770 0.02823 0.08871 0.5060
## Detection Rate 0.2923 0.02016 0.06048 0.4274
## Detection Prevalence 0.3770 0.02419 0.08065 0.5181
## Balanced Accuracy 0.8197 0.85507 0.82985 0.8305
# Ensemble with predictions for Probability
New.Provider<- train$New.Provider
ens_input_new <- cbind( New.Provider,ens_input_rf_train_predict,
ens_input_gbm_train_predict,
ens_input_ml_train_predict,
ens_input_nb_train_predict,
ens_input_svm_train_predict
)
modfit.rf.ens_new_prob <- randomForest(New.Provider ~. , data=ens_input_new, probability = TRUE)
ens_new_prob_prediction <- function(input){
#RF
ens_input_rf_predict <- predict(modfit.rf, input, type="prob")
colnames(ens_input_rf_predict)<-paste(colnames(ens_input_rf_predict),"rf",sep="_")
#GBM
ens_input_gbm_predict<- predict(modfit.gbm,input, type="prob")
colnames(ens_input_gbm_predict)<-paste(colnames(ens_input_gbm_predict),"gbm",sep="_")
#Mlogit
ens_input_ml_predict <- predict(modfit.mlogit, input, type="prob")
colnames(ens_input_ml_predict)<-paste(colnames(ens_input_ml_predict),"ml",sep="_")
# Naive Bayes
ens_input_nb_train_predict <- predict(modfit.naivebayes, input, type="raw")
colnames(ens_input_nb_train_predict)<-paste(colnames(ens_input_nb_train_predict),"nb",sep="_")
# SVM
ens_input_svm_predict <- predict(modfit.svm, input,decision.values = FALSE, probability = TRUE)
ens_input_svm_predict<- attr(ens_input_svm_predict, "probabilities")
colnames(ens_input_svm_predict)<-paste(colnames(ens_input_svm_predict),"svm",sep="_")
ens_input <- cbind( ens_input_rf_predict,
ens_input_gbm_predict,
ens_input_ml_predict,
ens_input_nb_train_predict,
ens_input_svm_predict
)
prediction_rf_ens <- predict(modfit.rf.ens_new_prob, ens_input,type="prob")
return(prediction_rf_ens)
}
fdata <- rbind( train, test)
sapply(fdata,is.factor)
## New.Provider Old.Provider
## TRUE TRUE
## Opportunity.Global.Region Service.Level.1
## TRUE TRUE
## Strategic.Segment.L1 Regional.Strategic.Account
## TRUE TRUE
## Revenue.Mill.Euros Margin
## FALSE FALSE
## isb_Aramark isb_Compass
## FALSE FALSE
## isb_Sodexo psq_Aramark
## FALSE FALSE
## psq_Compass psq_Sodexo
## FALSE FALSE
## pb_Aramark pb_Compass
## FALSE FALSE
## pb_Sodexo pm_Aramark
## FALSE FALSE
## pm_Compass pm_Sodexo
## FALSE FALSE
## com_Aramark com_Compass
## FALSE FALSE
## com_Sodexo saf_Aramark
## FALSE FALSE
## saf_Compass saf_Sodexo
## FALSE FALSE
## sus_Aramark sus_Compass
## FALSE FALSE
## sus_Sodexo oss_Aramark
## FALSE FALSE
## oss_Compass oss_Sodexo
## FALSE FALSE
simulated_data<- expand.grid(
Old.Provider = unique(fdata$Old.Provider),
Opportunity.Global.Region = unique(fdata$Opportunity.Global.Region),
Service.Level.1 = unique(fdata$Service.Level.1),
Strategic.Segment.L1 = unique(fdata$Strategic.Segment.L1),
Regional.Strategic.Account = unique(fdata$Regional.Strategic.Account)
)
is.fact <- sapply(fdata, is.factor)
not.factors.df <- fdata[, !is.fact]
means <- as.data.frame(t(colMeans(not.factors.df)))
simulated_data <- cbind(simulated_data,means )
head(simulated_data)
## Old.Provider Opportunity.Global.Region Service.Level.1 Strategic.Segment.L1
## 1 Compass NORAM Soft FM Mining
## 2 Other NORAM Soft FM Mining
## 3 Sodexo NORAM Soft FM Mining
## 4 Aramark NORAM Soft FM Mining
## 5 Compass BRAZIL Soft FM Mining
## 6 Other BRAZIL Soft FM Mining
## Regional.Strategic.Account Revenue.Mill.Euros Margin isb_Aramark
## 1 0 1.358793 14.44377 10.00878
## 2 0 1.358793 14.44377 10.00878
## 3 0 1.358793 14.44377 10.00878
## 4 0 1.358793 14.44377 10.00878
## 5 0 1.358793 14.44377 10.00878
## 6 0 1.358793 14.44377 10.00878
## isb_Compass isb_Sodexo psq_Aramark psq_Compass psq_Sodexo pb_Aramark
## 1 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## 2 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## 3 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## 4 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## 5 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## 6 2.326788 1.49172 20.46673 11.88457 29.28386 12.92972
## pb_Compass pb_Sodexo pm_Aramark pm_Compass pm_Sodexo com_Aramark com_Compass
## 1 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## 2 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## 3 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## 4 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## 5 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## 6 1.052027 0.1134526 1.621887 7.247475 8.539377 2.401373 3.02877
## com_Sodexo saf_Aramark saf_Compass saf_Sodexo sus_Aramark sus_Compass
## 1 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## 2 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## 3 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## 4 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## 5 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## 6 0.9122038 3.167799 5.648939 3.304156 3.834066 7.316548
## sus_Sodexo oss_Aramark oss_Compass oss_Sodexo
## 1 14.63913 3.426682 0.03233892 0.03054534
## 2 14.63913 3.426682 0.03233892 0.03054534
## 3 14.63913 3.426682 0.03233892 0.03054534
## 4 14.63913 3.426682 0.03233892 0.03054534
## 5 14.63913 3.426682 0.03233892 0.03054534
## 6 14.63913 3.426682 0.03233892 0.03054534
simulated_data_predictions <- cbind(simulated_data,as.data.frame(ens_new_prob_prediction(simulated_data)))
plot_data <- simulated_data_predictions %>% group_by(Old.Provider)%>% summarise_at(vars(Other:Sodexo), mean, na.rm = TRUE) %>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2)) %>% ggplot( aes(fill=`New Provider`, y=Probability, x=Old.Provider, label=Probability))+ ylim(0,100)+ geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 2.5) +
geom_bar(position="dodge", stat="identity")+scale_fill_brewer(palette="Blues")+
#theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill = "transparent",colour = NA),
#plot.background = element_rect(fill = "transparent",colour = NA) )
labs( x ="Old Provider", y = "Predicted Probability (%)")+
ggtitle("PDP Plot for Predicted Win Probabilities by Old Provider") +
theme(plot.title = element_text(hjust = 0.5))
plot_data
plot_data <- simulated_data_predictions %>% group_by(Opportunity.Global.Region)%>% summarise_at(vars(Other:Sodexo), mean, na.rm = TRUE) %>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2)) %>% ggplot( aes(fill=`New Provider`, y=Probability, x=Opportunity.Global.Region, label = Probability))+ ylim(0,100)+ geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 2.5) +
geom_bar(position="dodge", stat="identity")+scale_fill_brewer(palette="Blues")+
labs( x ="Opportunity Global Region", y = "Predicted Probability (%)")+
ggtitle("PDP Plot for Predicted Win Probabilities by Opportunity Global Region") +
theme(plot.title = element_text(hjust = 0.5))
plot_data
plot_data <- simulated_data_predictions %>% group_by(Service.Level.1)%>% summarise_at(vars(Other:Sodexo), mean, na.rm = TRUE) %>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2)) %>% ggplot( aes(fill=`New Provider`, y=Probability, x=Service.Level.1, label = Probability))+ ylim(0,100)+ geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 2.5) +
geom_bar(position="dodge", stat="identity")+scale_fill_brewer(palette="Blues")+
labs( x ="Service Level 1", y = "Predicted Probability (%)")+
ggtitle("PDP Plot for Predicted Win Probabilities by Service Level 1") +
theme(plot.title = element_text(hjust = 0.5))
plot_data
plot_data <- simulated_data_predictions %>% group_by(Strategic.Segment.L1 )%>% summarise_at(vars(Other:Sodexo), mean, na.rm = TRUE) %>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2)) %>% ggplot( aes(fill=`New Provider`, y=Probability, x=Strategic.Segment.L1 , label = Probability))+ ylim(0,100)+ geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 2.5) +
geom_bar(position="dodge", stat="identity")+scale_fill_brewer(palette="Blues")+
labs( x ="Strategic Segment L1", y = "Predicted Probability (%)")+
ggtitle("PDP Plot for Predicted Win Probabilities by Strategic Segment L1") +
theme(plot.title = element_text(hjust = 0.5))
plot_data
plot_data <- simulated_data_predictions %>% group_by(Regional.Strategic.Account )%>% summarise_at(vars(Other:Sodexo), mean, na.rm = TRUE) %>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2)) %>% ggplot( aes(fill=`New Provider`, y=Probability, x=Regional.Strategic.Account , label = Probability))+ ylim(0,100)+ geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 2.5) +
geom_bar(position="dodge", stat="identity")+scale_fill_brewer(palette="Blues")+
labs( x ="Regional Strategic Account", y = "Predicted Probability (%)")+
ggtitle("PDP Plot for Predicted Win Probabilities by Regional Strategic Account") +
theme(plot.title = element_text(hjust = 0.5))
knitr::opts_chunk$set(fig.width=unit(18,"cm"), fig.height=unit(11,"cm"))
plot_data
## Function to create PDP's
pdp_singl_continuous <- function(name){
pars <- as.list(match.call()[-1])
x_s <- simulated_data[,as.character(pars$name)] # grid where we want partial dependencies
#print(names(x_s))
a <- as.character(pars$name)
x_c <- dplyr::select(simulated_data, - a) # other predictors
#print(names(x_c))
x_s <- seq(min( fdata[,as.character(pars$name)]), max(fdata[,as.character(pars$name)]), length.out = 100)
simulated_data_new <- merge(x_s , x_c)
colnames(simulated_data_new)[1] <- as.character(pars$name)
simulated_data_new_predictions <- cbind(simulated_data_new,as.data.frame(ens_new_prob_prediction(simulated_data_new)))
#print(substitute(name))
pd <- simulated_data_new_predictions %>%
group_by(!!var) %>%
summarise(across(c("Other", "Aramark","Compass","Sodexo" ), mean, na.rm = TRUE))%>% gather(`New Provider`, Probability, Other :Sodexo)%>% mutate(Probability = round( Probability*100, digits = 2))
return(pd)
}
var <- quo(Revenue.Mill.Euros)
pdp_data <-pdp_singl_continuous("Revenue.Mill.Euros")
library(plotly)
fig <- plot_ly(pdp_data, x = ~Revenue.Mill.Euros, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities - Revenue in Million Euros")
fig
var <- quo(Margin)
pdp_data <-pdp_singl_continuous("Margin")
library(plotly)
fig <- plot_ly(pdp_data, x = ~Margin, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities - Margin")
fig
var <- quo(isb_Sodexo)
pdp_data <-pdp_singl_continuous("isb_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~isb_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Initial Sales and Bidding - Sodexo")
fig
var <- quo(psq_Sodexo)
pdp_data <-pdp_singl_continuous("psq_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~psq_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Product and Service Quality - Sodexo")
fig
var <- quo(pb_Sodexo)
pdp_data <-pdp_singl_continuous("pb_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~pb_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Pricing and Billing - Sodexo")
fig
var <- quo(pb_Sodexo)
pdp_data <-pdp_singl_continuous("pb_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~pb_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Pricing and Billing - Sodexo")
fig
var <- quo(pm_Sodexo)
pdp_data <-pdp_singl_continuous("pm_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~pm_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Project Management - Sodexo")
fig
var <- quo(com_Sodexo)
pdp_data <-pdp_singl_continuous("com_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~com_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Communication - Sodexo")
fig
var <- quo(saf_Sodexo)
pdp_data <-pdp_singl_continuous("saf_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~saf_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Safety - Sodexo")
fig
var <- quo(sus_Sodexo)
pdp_data <-pdp_singl_continuous("sus_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~sus_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Sustainability - Sodexo")
fig
var <- quo(oss_Sodexo)
pdp_data <-pdp_singl_continuous("oss_Sodexo")
library(plotly)
fig <- plot_ly(pdp_data, x = ~oss_Sodexo, y = ~Probability, color = ~`New Provider`)
fig <- fig %>% add_lines()%>% layout(title="PDP Plot for Predicted Win Probabilities: Ongoing Service Support - Sodexo")
fig